####################################################################
#***2019 analysis
## Packages
# To install and open the R packages that you need for this code. 
require(devtools)
install_version("dotwhisker", version = "0.6", repos = "http://cran.us.r-project.org")

need <- c('tidyverse','readstata13','lfe','glue','rdrobust', 'stargazer','arm', 'broom', 'ggplot2', 'dotwhisker', 'gridExtra')
have <- need %in% rownames(installed.packages()) 
if(any(!have)) install.packages(need[!have]) 
invisible(lapply(need, library, character.only=T)) 

# Change path to whereever you place the models
# To set up the working directory. 
script_folder = dirname(rstudioapi::getSourceEditorContext()$path)
setwd(glue('{script_folder}'))
rm(list = ls())
setwd("../")

## load in RD data:
load("5prepdata/final_rd_data.RData")

##################################################
#set some functions
##################################################

## state by state function:


extractcoef <- function(x, label) {
  x<-data.frame(x$beta[1], x$se[1], label)
}

group_var_values <- function(x) {
  y <- cut(x,3,labels=c("low","med","high"))
  return(y)
} #note that this will return missings when original variable is missing

rd.data$mds1_std<-(rd.data$mds1-mean(rd.data$mds1, na.rm=TRUE))/sd(rd.data$mds1, na.rm=TRUE)
rd.data$wonXMDSdel <- rd.data$won_election*rd.data$del.size
rd.data$wonXMDSstd <- rd.data$won_election*rd.data$mds1_std


main.specification<- function(x) {
  #estimation.sample<-rd.data[!is.na(y),] %>% filter(decade == decade.var)
  #rd.data <- rd.data %>% filter(decade == decade.var)
  
  rd.data$cutoff<-NULL
  rd.data$bandwidth<-NULL
  rd.data$kw<-NULL
  
  y<-rdbwselect(x, rd.data$victory_marg, p=1, c = 0,  kernel = "tri", bwselect="mserd", covs = rd.data$running_terms_served + rd.data$dem + rd.data$rep + rd.data$spc_elec + rd.data$term_length + rd.data$number_candidates + rd.data$year)
  rd.data$bandwidth<-y$bws[1]
  rm(y)
  
  # gen cutoff = 0
  rd.data$cutoff <- 0
  # gen kw = 1-(abs(cutoff-victory_marg))/bandwidth
  rd.data$kw <- 1-(abs(rd.data$cutoff-rd.data$victory_marg))/rd.data$bandwidth
  # replace kw = 0 if (victory_marg>cutoff+bandwidth | victory_marg<cutoff-bandwidth) 
  rd.data$kw[rd.data$victory_marg>(rd.data$cutoff+rd.data$bandwidth) | rd.data$victory_marg<(rd.data$cutoff-rd.data$bandwidth)] <- 0
  out <- felm(x[rd.data$kw>0] ~ won_election  + victory_marg + f_x_win + running_terms_served + dem + rep + spc_elec + term_length + number_candidates | year + state | 0 | state, data = rd.data[rd.data$kw>0,], weights = rd.data$kw[rd.data$kw>0])
  rd.data$mainefx<-NULL
  rd.data$interaction<-NULL
  return(out)
}


main.specification.subset.del <- function(x,y,z) {
  estimation.sample<-rd.data[!is.na(y) & y==z,] 
  estimation.sample$cutoff<-NULL
  estimation.sample$bandwidth<-NULL
  estimation.sample$kw<-NULL
  
  y<-rdbwselect(estimation.sample$ever_hle, estimation.sample$victory_marg, p=1, c = 0,  kernel = "tri", bwselect="mserd")
  
  estimation.sample$bandwidth<-y$bws[1]
  rm(y)
  
  # gen cutoff = 0
  estimation.sample$cutoff <- 0
  # gen kw = 1-(abs(cutoff-victory_marg))/bandwidth
  estimation.sample$kw <- 1-(abs(estimation.sample$cutoff-estimation.sample$victory_marg))/estimation.sample$bandwidth
  # replace kw = 0 if (victory_marg>cutoff+bandwidth | victory_marg<cutoff-bandwidth) 
  estimation.sample$kw[estimation.sample$victory_marg>(estimation.sample$cutoff+estimation.sample$bandwidth) | estimation.sample$victory_marg<(estimation.sample$cutoff-estimation.sample$bandwidth)] <- 0
  
  out <- felm(estimation.sample$ever_hle[estimation.sample$kw>0] ~ won_election +wonXMDSstd + mds1_std + victory_marg + f_x_win + running_terms_served + dem + rep + spc_elec + term_length + number_candidates | year + const | 0 | const, data = estimation.sample[estimation.sample$kw>0,], weights = estimation.sample$kw[estimation.sample$kw>0])
  return(out)
}




m1<-main.specification.subset.del(rd.data$ever_runhouse, rd.data$size.rank.cat, "low")
m2<-main.specification.subset.del(rd.data$ever_runhouse, rd.data$size.rank.cat, "med")
m3<-main.specification.subset.del(rd.data$ever_runhouse, rd.data$size.rank.cat, "high")


stargazer(list(m1, m2, m3),
          title = "Effect of state legislative service and professionalism by delegation size",
          style="apsr",
          font.size =  "scriptsize",
          model.numbers=T,
          out="7tex/manuscript/tables/sourcefiles/Appendix Table 18.tex",
          label = "tab:del_size_int",
          keep.stat=c("n","rsq"),
          dep.var.labels  = "Run for House",
          column.labels=c("Small Delegation", "Medium Delegation", "Large Delegation"),
          keep = c("won_election", "mds1_std", "wonXMDSstd"), order = c(1, 3, 2),
          covariate.labels = c("Won", "Professionalism", "Ever Won x Professionalism"), 
          #add.lines = list(c("Bandwidth", m1[[2]], m2[[2]], m3[[2]], m4[[2])),
          notes = "\\parbox[t]{\\linewidth}{Note: This table reports the same model specifications as in Table \\ref{tab:main_results}, but includes an interaction with a globally unit-standardized professionalism score for the state-chamber-year based on \\citet{bowen2014}. The sample is split by tertile of the size of the state's House delegation (time-varying). The dependent variable is equal to one if the candidate ever runs in the election listed in the column header and is zero otherwise. The sample contains all first-time state legislative elections within the optimal bandwidth based on the \\cite{CCT} algorithm. All regressions include state and election year fixed effects, a linear term in the election margin as well as its interaction with the indicator for having won, and the full set of candidate and election controls. Estimations are triangular kernel-weighted.  Standard errors clustered by state are reported in parentheses.}"
)







#################################################
#### decade models ####


main.specification.subset.decade <- function(x, decade.var) {
  #estimation.sample<-rd.data[!is.na(y),] %>% filter(decade == decade.var)
  rd.data <- rd.data %>% filter(decade == decade.var)
  
  rd.data$cutoff<-NULL
  rd.data$bandwidth<-NULL
  rd.data$kw<-NULL
  
  y<-rdbwselect(x, rd.data$victory_marg, p=1, c = 0,  kernel = "tri", bwselect="mserd", covs = rd.data$running_terms_served + rd.data$dem + rd.data$rep + rd.data$spc_elec + rd.data$term_length + rd.data$number_candidates + rd.data$year)
  rd.data$bandwidth<-y$bws[1]
  rm(y)
  
  # gen cutoff = 0
  rd.data$cutoff <- 0
  # gen kw = 1-(abs(cutoff-victory_marg))/bandwidth
  rd.data$kw <- 1-(abs(rd.data$cutoff-rd.data$victory_marg))/rd.data$bandwidth
  # replace kw = 0 if (victory_marg>cutoff+bandwidth | victory_marg<cutoff-bandwidth) 
  rd.data$kw[rd.data$victory_marg>(rd.data$cutoff+rd.data$bandwidth) | rd.data$victory_marg<(rd.data$cutoff-rd.data$bandwidth)] <- 0
  out <- felm(x[rd.data$kw>0] ~ won_election  + victory_marg + f_x_win + running_terms_served + dem + rep + spc_elec + term_length + number_candidates | year + state | 0 | state, data = rd.data[rd.data$kw>0,], weights = rd.data$kw[rd.data$kw>0])
  rd.data$mainefx<-NULL
  rd.data$interaction<-NULL
  return(out)
}


# APPENDIX TABLE 22:

## baseline rate by decade:
rd.data %>% 
  ungroup %>% 
  group_by(decade) %>% 
  summarize(ever_runhouse = mean(ever_runhouse), ever_winhouse = mean(ever_winhouse))



m.1970.runhouse <- main.specification.subset.decade(rd.data$ever_runhouse[rd.data$decade==1970], 1970) %>% tidy %>% filter(term == "won_election") %>% mutate(model="1970")
m.1980.runhouse <- main.specification.subset.decade(rd.data$ever_runhouse[rd.data$decade==1980], 1980) %>% tidy %>% filter(term == "won_election") %>% mutate(model="1980")
m.1990.runhouse <- main.specification.subset.decade(rd.data$ever_runhouse[rd.data$decade==1990], 1990) %>% tidy %>% filter(term == "won_election") %>% mutate(model="1990")
m.2000.runhouse <- main.specification.subset.decade(rd.data$ever_runhouse[rd.data$decade==2000], 2000) %>% tidy %>% filter(term == "won_election") %>% mutate(model="2000")
m.runhouse <- main.specification(rd.data$ever_runhouse) %>% tidy %>% filter(term=="won_election") %>% mutate(model = "Full Sample")

m.1970.winhouse <- main.specification.subset.decade(rd.data$ever_winhouse[rd.data$decade==1970], 1970) %>% tidy %>% filter(term == "won_election") %>% mutate(model="1970")
m.1980.winhouse <- main.specification.subset.decade(rd.data$ever_winhouse[rd.data$decade==1980], 1980) %>% tidy %>% filter(term == "won_election") %>% mutate(model="1980")
m.1990.winhouse <- main.specification.subset.decade(rd.data$ever_winhouse[rd.data$decade==1990], 1990) %>% tidy %>% filter(term == "won_election") %>% mutate(model="1990")
m.2000.winhouse <- main.specification.subset.decade(rd.data$ever_winhouse[rd.data$decade==2000], 2000) %>% tidy %>% filter(term == "won_election") %>% mutate(model="2000")
m.winhouse <- main.specification(rd.data$ever_winhouse) %>% tidy %>% filter(term=="won_election") %>% mutate(model = "Full Sample")


dwplot(bind_rows(m.1970.runhouse, m.1980.runhouse, m.1990.runhouse, m.2000.runhouse, m.runhouse) %>% 
         mutate(term = ""),
       vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2),
       dot_args = list(aes(shape = model))) +
  theme_bw() +
  scale_y_discrete(breaks=NULL) +
  xlab("Coefficient estimate")+
  theme(panel.grid.minor.y=element_blank(),
        panel.grid.major.y=element_blank(),
        panel.grid.minor.x=element_blank(),
        panel.grid.major.x=element_blank()) 
  ggsave(file = "7tex/manuscript/tables/sourcefiles/Appendix Figure 3a.pdf", units="in", width=7, height=2.5)


dwplot(bind_rows(m.1970.winhouse, m.1980.winhouse, m.1990.winhouse, m.2000.winhouse, m.winhouse) %>%
  mutate(term = ""),
  vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2),
  dot_args = list(aes(shape = model))) +
  theme_bw() +
  scale_y_discrete(breaks=NULL) +
  xlab("Coefficient estimate") +
  theme(panel.grid.minor.y=element_blank(),
        panel.grid.major.y=element_blank(),
        panel.grid.minor.x=element_blank(),
        panel.grid.major.x=element_blank()) 
  ggsave(file = "7tex/manuscript/tables/sourcefiles/Appendix Figure 3b.pdf", units="in", width=7, height=2.5)



